# -*- coding: utf-8 -*-
import scrapy


class LuoxiaSpider(scrapy.Spider):
    name = 'luoxia'
    # allowed_domains = ['luoxia.com']
    start_urls = ['https://www.luoxia.com/meizhe/79400.htm']
    #简单的反爬虫
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"
        ,
        "Accept":
        "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"
    }

    def parse(self, response):
        contents = response.xpath('//article[@class="post clearfix"]')
        yield {
            # 获得章节名称
            "title" : contents.xpath('header/h1[@id="nr_title"]/text()').extract()[0],
            # 获得章节内容
            "content" : contents.xpath('div[@id="nr1"]/p/text()').extract()
        }
        # 得到下一页的网址
        next_url=response.xpath('//nav/ul/li[@class="next"]/a/@href').extract()[1]
        # print(next_url)
        yield scrapy.Request(url=next_url, callback=self.parse, headers=self.headers)